home *** CD-ROM | disk | FTP | other *** search
- /*-------------------------- METAPHON.C -------------------
- *
- * Author: Gary Parker
- * Compilers: Borland C++ 2.0, Turbo C++, Turbo C 2.0, MSC 6.0
- * Memory model: any
- *
- * This is an implementation of the Metaphone algorithm developed
- * by Lawrence Philips. Like the Soundex algorithm, it compares
- * words that sound alike but are spelled differently. Metaphone
- * was designed to overcome difficulties encountered with Soundex.
- *
- * Usage:
- * The calling function must pass three arguments:
- *
- * char *Word - the word to be converted to a 'metaph'
- * char *Metaph - a MAXMETAPH+1 byte field for the result
- * int Flag - a flag
- *
- * If flag is 1 then a 'metaph' will be computed for Word and
- * stored in Metaph. If flag is 0, then the function will compute
- * a 'metaph' for Word and compare it with the 'metaph', passed in
- * Metaph. It will return 0 for a match, else -1. The function
- * will also return -1 if Word is 0 bytes long.
- *
- * This code is hereby placed in the public domain.
- *----------------------------------------------------------------------*/
-
- /*------------Metaphone Transformations----------------------------------
- * PN- GN- KN- N
- * AE- E
- * WR- R
- * WH- H
- * X- S
- *
- * B B (unless in -MB)
- * C X if in -CIA-, -CH-
- * else S if in -CI-, -CE-, -CY-
- * else dropped if in -SCI-, -SCE-, -SCY-
- * else K
- * D J if in -DGE-, -DGI-, -DGY-
- * else T
- * G F if in -GH and not B--GH, D--GH,-H--GH, -H---GH
- * else dropped if -GNED, -GN, -DGE-, -DGI-, -DGY-
- * else J if in -GE-, -GI-, -GY- and not GG
- * else K
- * H H if before a vowel and not after C, G, P, S, T
- * else dropped
- * K dropped if after C, else K
- * P F if before H, else P
- * Q K
- * S X in -SIO- or -SIA-, else S
- * T X in -TIA- or -TIO-
- * else O before H
- * else T
- * V F
- * W W after a vowel, else dropped
- * X KS
- * Y Y unless followed by a vowel
- * Z S
- *
- * Note: F, J, L, M, N, R are never transformed.
- *
- *----------------------------------------------------------------------*/
-
- #include <ctype.h>
-
- #define MAXMETAPH 4
-
- int metaphone(char *,char *, int);
-
- /* Character coding array */
- static char vsvfn[26] = {
- 1,16,4,16,9,2,4,16,9,2,0,2,2,2,1,4,0,2,4,4,1,0,0,0,8,0};
- /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */
-
- /* Macros to access character coding array */
- #define vowel(x) (vsvfn[(x) - 'A'] & 1) /* AEIOU */
- #define same(x) (vsvfn[(x) - 'A'] & 2) /* AEIOU */
- #define varson(x) (vsvfn[(x) - 'A'] & 4) /* AEIOU */
- #define frontv(x) (vsvfn[(x) - 'A'] & 8) /* AEIOU */
- #define noghf(x) (vsvfn[(x) - 'A'] & 16) /* AEIOU */
-
- int metaphone(char *Word, char *Metaph, int Flag)
- {
- char *n, *n_start, *n_end; /* pointers to string */
- char *metaph, *metaph_end; /* pointers to metaph */
- char ntrans[32]; /* word with uppercase letters */
- char newm[8]; /* new metaph for comparison */
- int KSflag; /* state flag for X translation */
-
- /*
- ** Copy Word to internal buffer, dropping non-alphabetic
- ** charactes and converting to uppercase.
- */
- for(n = ntrans + 1, n_end = ntrans + 30;
- *Word && n < n_end; Word++)
- if(isalpha(*Word)) *n++ = toupper(*Word);
-
- if(n == ntrans + 1) return -1; /* return if 0 bytes */
- n_end = n; /* set n_end to end of string */
-
- /* ntrans[0] will always be == 0 */
- *n++ = 0; *n = 0; /* pad with nulls */
- n = ntrans + 1; /* assign pointer to start */
-
- /* if doing a comparision, redirect pointers */
- if(!Flag) { metaph = Metaph; Metaph = newm; }
-
- /* check for PN KN GN AE WR WH and X at start */
- switch(*n) {
- case 'P': case 'K': case 'G':
- if(*(n + 1) == 'N') *n++ = 0;
- break;
- case 'A':
- if(*(n + 1) == 'E') *n++ = 0;
- break;
- case 'W':
- if(*(n + 1) == 'R') *n++ = 0;
- else if(*(n + 1) == 'H') {
- *(n + 1) = *n;
- *n++ = 0;
- }
- break;
- case 'X':
- *n = 'S';
- break;
- }
-
- /*
- ** Now, loop step through string, stopping at end of string
- ** or when the computed 'metaph' is MAXMETAPH characters long
- */
- KSflag = 0; /* state flag for KS translation */
- for(metaph_end = Metaph + MAXMETAPH, n_start = n;
- n <= n_end && Metaph < metaph_end; n++) {
-
- if (KSflag) {
- KSflag = 0;
- *Metaph++= *n;
- }
- else {
- /* drop duplicates except for CC */
- if(*(n - 1) == *n && *n != 'C') continue;
-
- /* check for F J L M N R or first letter vowel */
- if(same(*n) || (n == n_start && vowel(*n)))
- *Metaph++ = *n;
- else switch(*n) {
- case 'B':
- if(n < n_end || *(n - 1) != 'M')
- *Metaph++ = *n;
- break;
- case 'C':
- if(*(n - 1) != 'S' || !frontv(*(n + 1))) {
- if(*(n + 1) == 'I' && *(n + 2) == 'A')
- *Metaph++ = 'X';
- else if(frontv(*(n + 1))) *Metaph++ = 'S';
- else if(*(n + 1) == 'H')
- *Metaph++ = ((n == n_start &&
- !vowel(*(n + 2))) ||
- *(n - 1) == 'S')
- ? (char)'K' : (char)'X';
- else *Metaph++ = 'K';
- }
- break;
- case 'D':
- *Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
- ? (char)'J' : (char)'T';
- break;
- case 'G':
- if((*(n + 1) != 'H' || vowel(*(n + 2))) &&
- (*(n + 1) != 'N' || ((n + 1) < n_end &&
- (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
- (*(n - 1) != 'D' || !frontv(*(n + 1))))
- *Metaph++ = (frontv(*(n + 1)) &&*(n + 2) != 'G')
- ? (char)'J' : (char)'K';
- else if(*(n + 1) == 'H' && !noghf(*(n - 3))
- && *(n - 4) != 'H') *Metaph++ = 'F';
- break;
- case 'H':
- if(!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
- vowel(*(n + 1)))) *Metaph++ = 'H';
- break;
- case 'K':
- if(*(n - 1) != 'C') *Metaph++ = 'K';
- break;
- case 'P':
- *Metaph++ = *(n + 1) == 'H'
- ? (char)'F' : (char)'P';
- break;
- case 'Q':
- *Metaph++ = 'K';
- break;
- case 'S':
- *Metaph++ = (*(n + 1) == 'H' || (*(n + 1) == 'I'
- && (*(n + 2) == 'O' || *(n + 2) == 'A')))
- ? (char)'X' : (char)'S';
- break;
- case 'T':
- if(*(n + 1) == 'I' && (*(n + 2) == 'O'
- || *(n + 2) == 'A'))
- *Metaph++ = 'X';
- else if(*(n + 1) == 'H') *Metaph++ = 'O';
- else if(*(n + 1) != 'C' || *(n + 2) != 'H')
- *Metaph++ = 'T';
- break;
- case 'V':
- *Metaph++ = 'F';
- break;
- case 'W': case 'Y':
- if(vowel(*(n + 1))) *Metaph++ = *n;
- break;
- case 'X':
- if(n == n_start) *Metaph++ = 'S';
- else {
- *Metaph++ = 'K'; /* insert K, then S */
- KSflag = 1; /* this flag will cause S to be
- inserted on next pass thru loop */
- }
- break;
- case 'Z':
- *Metaph++ = 'S';
- break;
- }
- }
-
- /* compare new metaph with old */
- if(!Flag && *(Metaph - 1) != metaph[(Metaph - newm) - 1])
- return -1;
- }
-
- /* If comparing, check if metaphs were equal in length. */
- if(!Flag && metaph[Metaph - newm])
- return -1;
-
- *Metaph = 0; /* null terminate return value */
- return 0;
- }
-